Topic: COVID-19 New cases visualisation dashboard: Shiny app
covid = read.csv("owid-covid-data.csv")
# these are indices for each country (an index is just a collection of stocks)
# 'FileEncoding' just cleans the column encoding for this case
#sp500 = read.csv("SPY Historical Data.csv", fileEncoding = 'UTF-8-BOM') # This is for US
#TOPIX = read.csv("TOPIX Historical Data.csv", fileEncoding = 'UTF-8-BOM') # This is for Japan
ASX200 = read.csv("S&P_ASX 200 Historical Data.csv", fileEncoding = 'UTF-8-BOM') # This is for Australia
#NSEI = read.csv("Nifty 50 Historical Data.csv", fileEncoding = 'UTF-8-BOM') # This is for India
#SSEC = read.csv("Shanghai Composite Historical Data.csv", fileEncoding = 'UTF-8-BOM') # this is for China
colnames(covid)## [1] "iso_code"
## [2] "continent"
## [3] "location"
## [4] "date"
## [5] "total_cases"
## [6] "new_cases"
## [7] "new_cases_smoothed"
## [8] "total_deaths"
## [9] "new_deaths"
## [10] "new_deaths_smoothed"
## [11] "total_cases_per_million"
## [12] "new_cases_per_million"
## [13] "new_cases_smoothed_per_million"
## [14] "total_deaths_per_million"
## [15] "new_deaths_per_million"
## [16] "new_deaths_smoothed_per_million"
## [17] "reproduction_rate"
## [18] "icu_patients"
## [19] "icu_patients_per_million"
## [20] "hosp_patients"
## [21] "hosp_patients_per_million"
## [22] "weekly_icu_admissions"
## [23] "weekly_icu_admissions_per_million"
## [24] "weekly_hosp_admissions"
## [25] "weekly_hosp_admissions_per_million"
## [26] "total_tests"
## [27] "new_tests"
## [28] "total_tests_per_thousand"
## [29] "new_tests_per_thousand"
## [30] "new_tests_smoothed"
## [31] "new_tests_smoothed_per_thousand"
## [32] "positive_rate"
## [33] "tests_per_case"
## [34] "tests_units"
## [35] "total_vaccinations"
## [36] "people_vaccinated"
## [37] "people_fully_vaccinated"
## [38] "total_boosters"
## [39] "new_vaccinations"
## [40] "new_vaccinations_smoothed"
## [41] "total_vaccinations_per_hundred"
## [42] "people_vaccinated_per_hundred"
## [43] "people_fully_vaccinated_per_hundred"
## [44] "total_boosters_per_hundred"
## [45] "new_vaccinations_smoothed_per_million"
## [46] "new_people_vaccinated_smoothed"
## [47] "new_people_vaccinated_smoothed_per_hundred"
## [48] "stringency_index"
## [49] "population"
## [50] "population_density"
## [51] "median_age"
## [52] "aged_65_older"
## [53] "aged_70_older"
## [54] "gdp_per_capita"
## [55] "extreme_poverty"
## [56] "cardiovasc_death_rate"
## [57] "diabetes_prevalence"
## [58] "female_smokers"
## [59] "male_smokers"
## [60] "handwashing_facilities"
## [61] "hospital_beds_per_thousand"
## [62] "life_expectancy"
## [63] "human_development_index"
## [64] "excess_mortality_cumulative_absolute"
## [65] "excess_mortality_cumulative"
## [66] "excess_mortality"
## [67] "excess_mortality_cumulative_per_million"
#Dropping NA
covid_clean = covid %>% drop_na(new_cases, new_cases_smoothed, new_vaccinations, new_vaccinations_smoothed, new_vaccinations_smoothed_per_million, population, population_density, median_age, extreme_poverty, total_vaccinations, hospital_beds_per_thousand, human_development_index, new_deaths, new_tests, weekly_icu_admissions_per_million, weekly_icu_admissions)
#This code changes the negative case values in the data set to zero
covid_clean$new_cases[covid_clean$new_cases < 0] <- 0#Dropping NA
covid_AUS <- covid %>% filter(location == "Australia")
covid_clean_AUS = covid_AUS %>% drop_na(new_cases, new_cases_smoothed, new_vaccinations, new_vaccinations_smoothed, new_vaccinations_smoothed_per_million, population, population_density, median_age, extreme_poverty, total_vaccinations, hospital_beds_per_thousand, human_development_index, new_deaths, new_tests, total_tests, total_cases)
#This code changes the negative case values in the data set to zero
covid_clean_AUS$new_cases[covid_clean_AUS$new_cases < 0] <- 0
view(covid_clean_AUS)
#glimpse(covid_clean)
#covid_clean$date = as.Date(covid_clean$date)
#max(covid_clean$date)
#unique(covid_clean$location)#code for more optimized join (to be pasted later)
#class(covid_clean$date)
# using a copy just in case
covid2 <- covid_clean_AUS
#sp500$Date = mdy(sp500$Date)
#TOPIX$Date = mdy(TOPIX$Date)
ASX200$date = mdy(ASX200$Date)
#NSEI$Date = mdy(NSEI$Date)
#SSEC$Date = mdy(SSEC$Date)
#sp500$date = mdy(sp500$Date)
#TOPIX$date = mdy(TOPIX$Date)
ASX200$date = mdy(ASX200$Date)
#NSEI$date = mdy(NSEI$Date)
#SSEC$date = mdy(SSEC$Date)
covid2$date = ymd(covid2$date)
#Temporarily changing the date to character as joining cannot be done with date objects
#Also selecting relevant columns for analysis later
covid2 <- covid2 %>%
transform(covid2, date = as.character(date)) %>%
select(date, new_cases, new_deaths, location, new_vaccinations, new_tests, population, population_density, total_tests, total_vaccinations, total_cases)
#sp500$Date <- as.character(sp500$Date)
#TOPIX$Date <- as.character(TOPIX$Date)
ASX200$date <- as.character(ASX200$date)
#NSEI$Date <- as.character(NSEI$Date)
#SSEC$Date <- as.character(SSEC$Date)
#sp500$date <- as.character(sp500$Date)
#TOPIX$date <- as.character(TOPIX$Date)
ASX200$date <- as.character(ASX200$date)
#NSEI$date <- as.character(NSEI$Date)
#SSEC$date <- as.character(SSEC$Date)
# renaming column so it has same name as the stock market data frames for joining later
#colnames(covid2)[1] = "Date"
# making data frames for each country we select to perform individual joins on each to their respective stock market index
#covid_US <- covid2 %>% filter(location == "United States")
covid_AUS <- covid2 %>% filter(location == "Australia")
#covid_IND <- covid2 %>% filter(location == "India")
#covid_JPN <- covid2 %>% filter(location == "Japan")
#covid_CHN <- covid2 %>% filter(location == "China")
# performing joins
#df_1 = inner_join(sp500, covid_US, by = "Date")
#df_2 = inner_join(TOPIX, covid_JPN, by = "Date")
df_3 = inner_join(ASX200, covid_AUS, by = "date")
#df_4 = inner_join(NSEI, covid_IND, by = "Date")
#df_5 = inner_join(SSEC, covid_CHN, by = "Date")
#df_1 = inner_join(sp500, covid_US, by = "date")
#df_2 = inner_join(TOPIX, covid_JPN, by = "date")
df_3 = inner_join(ASX200, covid_AUS, by = "date")
#df_4 = inner_join(NSEI, covid_IND, by = "date")
#df_5 = inner_join(SSEC, covid_CHN, by = "date")
# vertically joined data set (now one column will store all the values of the respective country index)
# e.g US stores prices relevant to S&P500 and China's prices are relevant to the the SSEC which is based in Shanghai.
#covid_joined <- rbind(df_1, df_2, df_3, df_4, df_5)
# Still need transform relevant column to numeric ect...will do a little later
df_aus <- df_3
#transformation
df_aus$date = as.Date(df_aus$date)
df_aus$Price = as.numeric(gsub(",","",df_aus$Price))
view(df_aus)covid_temp <- covid_clean
covid_temp$month <- strftime(covid_temp$date, "%m")
covid_temp$year <- strftime(covid_temp$date, "%Y")
covid_temp_new_case_aggregate <- aggregate(new_cases_smoothed~month+year,
covid_temp,
FUN = mean)
covid_temp_new_case_aggregate$month_year <- paste(covid_temp_new_case_aggregate$month, covid_temp_new_case_aggregate$year)ggplot(covid_temp_new_case_aggregate, aes(x= new_cases_smoothed, y= month_year)) +
geom_bar(stat = 'identity') + ggtitle("Average New Cases (Smoothed) per Month") +
ylab("month year") + xlab("average new cases")covid_temp = covid_clean %>% select(date,new_people_vaccinated_smoothed)
covid_temp$date <- as.Date(covid_temp$date, format = "%Y-%m-%d")
covid_temp_new_vaccinated_aggregate = covid_temp %>% mutate(month_year = as.character(format(date, "%m-%Y"))) %>%
group_by(month_year) %>%
summarise(date=date[1], number = mean(new_people_vaccinated_smoothed))
covid_temp_new_vaccinated_aggregate ggplot(covid_temp_new_vaccinated_aggregate, aes(x = number, y= month_year)) +
geom_bar(stat = 'identity') + ggtitle("Average New People Vaccinated (Smoothed) per Month") +
ylab("month year") + xlab("average new people vaccinated")df_aus = df_aus[order(as.Date(df_aus$date, format="%d/%m/%Y")),]
colnames(df_aus)[7] = 'Change'
#write.csv(df_aus,"df_aus.csv")#glimpse(df_aus)
df_aus_subset = df_aus %>% select(Price, new_cases)
view(df_aus_subset)
M0 = lm(Price ~ new_cases, data = df_aus_subset) # Null model
summary(M0)##
## Call:
## lm(formula = Price ~ new_cases, data = df_aus_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -549.62 -157.03 60.97 163.28 405.41
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.223e+03 1.525e+01 473.504 <2e-16 ***
## new_cases 1.166e-03 5.762e-04 2.024 0.044 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 226.3 on 262 degrees of freedom
## Multiple R-squared: 0.0154, Adjusted R-squared: 0.01164
## F-statistic: 4.097 on 1 and 262 DF, p-value: 0.04396
Scatter Plot for price and new_cases
y <- df_aus$Price
x <- df_aus$new_cases
plot(x, y, main = "Price ~ New Cases",
ylab = "Price", xlab = "new_cases",
pch = 19, frame = FALSE)
# Add regression line
plot(x, y, main = "Price ~ New Cases",
ylab = "Price", xlab = "new_cases",
pch = 19, frame = FALSE)
abline(lm(y ~ x, data = df_aus), col = "blue")# df_aus_subset = df_aus %>% select(Price, new_deaths) %>% filter(new_deaths != 0)df_aus_subset = df_aus %>% select(Price, new_deaths, new_vaccinations, new_cases)
view(df_aus_subset)
M1 = lm(Price ~ poly(new_vaccinations, degree=2), data=df_aus_subset)
summary(M1)##
## Call:
## lm(formula = Price ~ poly(new_vaccinations, degree = 2), data = df_aus_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -555.39 -72.58 -0.10 77.58 697.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7235.494 8.446 856.70 <2e-16 ***
## poly(new_vaccinations, degree = 2)1 2467.685 137.228 17.98 <2e-16 ***
## poly(new_vaccinations, degree = 2)2 -1620.816 137.228 -11.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 137.2 on 261 degrees of freedom
## Multiple R-squared: 0.6394, Adjusted R-squared: 0.6367
## F-statistic: 231.4 on 2 and 261 DF, p-value: < 2.2e-16
ggplot(data = df_aus_subset, mapping = aes(x = Price, y = new_deaths)) + geom_point() + ggtitle("Prices of stocks and against new death") + ylab("New deaths") + xlab("Price")ggplot(data = df_aus, mapping = aes(x = new_vaccinations, y = Price)) + geom_point() + ggtitle("Prices of stocks against new vaccinations") + xlab("New vaccinations") + ylab("Price")df_aus_subset = df_aus %>% select(Price, new_tests)
view(df_aus_subset)
M2 = lm(Price ~ poly(new_tests, degree=2), data=df_aus_subset)
summary(M2)##
## Call:
## lm(formula = Price ~ poly(new_tests, degree = 2), data = df_aus_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -469.40 -88.38 12.24 86.71 401.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7235.494 8.392 862.236 < 2e-16 ***
## poly(new_tests, degree = 2)1 2767.837 136.347 20.300 < 2e-16 ***
## poly(new_tests, degree = 2)2 -1057.592 136.347 -7.757 1.97e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 136.3 on 261 degrees of freedom
## Multiple R-squared: 0.6441, Adjusted R-squared: 0.6413
## F-statistic: 236.1 on 2 and 261 DF, p-value: < 2.2e-16
ggplot(data = df_aus, mapping = aes(x = new_tests, y = Price)) + geom_point() + ggtitle("Prices of stocks against new tests") + xlab("New tests") + ylab("Price")ggplot(data = df_aus, mapping = aes(x = Price, y = population)) + geom_point() + ggtitle("Prices of stocks against population") + ylab("New tests") + xlab("Price")
ggplot(data = df_aus, mapping = aes(x = Price, y = population_density)) + geom_point() + ggtitle("Prices of stocks against population density") + ylab("New tests") + xlab("Price")
ggplot(data = df_aus, mapping = aes(x = date, y = population_density)) + geom_line() + ggtitle("Prices of stocks against population density") + ylab("New tests") + xlab("Price") # bad
summary(df_aus$Price)df_aus$Price_mul30 = df_aus$Price*30
p1 = ggplot(data = df_aus) + geom_line(aes(x=date, y = new_cases), color = "red") + geom_line(aes(x=date, y = new_vaccinations), color = "light green") + geom_line(aes(x=date, y = new_tests), color = "light blue") +
geom_line(aes(x=date, y = Price_mul30), color = "blue") + theme_bw()
df_aus$Price_mul30 = df_aus$Price*30
p1 = ggplot(data = df_aus) + geom_line(aes(x=date, y = new_cases), color = "red") + geom_line(aes(x=date, y = new_vaccinations), color = "light green") + geom_line(aes(x=date, y = new_tests), color = "light blue")+ theme_bw()
ggplotly(p1)autoplot(M0, which = 1:2)
autoplot(M1, which = 1:2)
autoplot(M2, which = 1:2)M = lm(Price ~ polym(new_tests, new_vaccinations, degree=2, raw=TRUE), data=df_aus)
summary(M)##
## Call:
## lm(formula = Price ~ polym(new_tests, new_vaccinations, degree = 2,
## raw = TRUE), data = df_aus)
##
## Residuals:
## Min 1Q Median 3Q Max
## -484.56 -59.44 7.11 68.74 320.66
##
## Coefficients:
## Estimate
## (Intercept) 6.652e+03
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.0 2.773e-03
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)2.0 -2.754e-09
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.1 3.958e-03
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.1 -2.408e-09
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.2 -8.281e-09
## Std. Error
## (Intercept) 2.498e+01
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.0 4.246e-04
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)2.0 1.455e-09
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.1 3.417e-04
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.1 1.588e-09
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.2 9.333e-10
## t value Pr(>|t|)
## (Intercept) 266.296 < 2e-16
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.0 6.531 3.45e-10
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)2.0 -1.892 0.0596
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.1 11.585 < 2e-16
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.1 -1.516 0.1307
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.2 -8.873 < 2e-16
##
## (Intercept) ***
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.0 ***
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)2.0 .
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.1 ***
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)1.1
## polym(new_tests, new_vaccinations, degree = 2, raw = TRUE)0.2 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110.5 on 258 degrees of freedom
## Multiple R-squared: 0.7688, Adjusted R-squared: 0.7643
## F-statistic: 171.5 on 5 and 258 DF, p-value: < 2.2e-16
ggplot(data = df_aus) + geom_point(aes(x = new_tests, y = Price), color = "light blue") +
geom_point(aes(x = new_vaccinations, y = Price), color = "light green") +
ggtitle("Prices of stocks against new tests and new_vaccinations") + ylab("Price") +theme_classic()covid_clean_AUS <- df_aus
percentage_tests <- (covid_clean_AUS$new_tests/ covid_clean_AUS$total_tests)*100
vec_percentage_tests <- c(percentage_tests)
covid_clean_AUS <-cbind(covid_clean_AUS, perc_new_tests = vec_percentage_tests)
covid_clean_AUS <- covid_clean_AUS
percentage_cases <- (covid_clean_AUS$new_cases/ covid_clean_AUS$total_cases)*100
vec_percentage_cases <- c(percentage_cases)
covid_clean_AUS <-cbind(covid_clean_AUS, perc_new_cases = vec_percentage_cases)
covid_clean_AUS <- covid_clean_AUS
percentage_vacc <- (covid_clean_AUS$new_vaccinations/ covid_clean_AUS$total_vaccinations)*100
vec_percentage_vacc <- c(percentage_vacc)
covid_clean_AUS <-cbind(covid_clean_AUS, perc_new_vacc = vec_percentage_vacc)
df_aus_Q2 <- covid_clean_AUS
df_aus_Q2$lprice = log(df_aus_Q2$Price)
view(df_aus_Q2)ggplot(data = df_aus_Q2) + geom_point(aes(x = perc_new_tests, y = lprice), color = "light blue") +
geom_point(aes(x = perc_new_vacc, y = lprice), color = "light green") +
geom_point(aes(x = perc_new_cases, y = lprice), color = "red") +
ggtitle("Prices of stocks againsts growth rate of new cases, new vaccinations and new tests") + xlab("growth rate") + ylab("log(Price)") +theme_classic()
Very bad plot….